// Copyright 2024 The Google Research Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package research_graph.in_memory;

import 'clustering/util/dynamic_weight_threshold.proto';

message AffinityClustererConfig {
  // Number of times we perform single-linkage clustering. If num_iterations =
  // 0, produces a clustering in which each node is in its own cluster. Note
  // that setting this to k is equivalent to setting compression_rounds
  // parameter of distributed affinity clustering to k-1.
  optional int32 num_iterations = 1 [default = 1];

  message WeightThresholdsSequence {
    repeated double thresholds = 1 [packed = true];
  }

  // Specifies the edge weight threshold in each iteration of the clustering.
  // In each iteration, edges of weight smaller than the threshold are ignored.
  oneof weight_threshold_config {
    // A fixed threshold in each round.
    double weight_threshold = 2;

    // A fixed sequence of the thresholds.
    // NOTE: If num_iterations > length of the list, then the last threshold
    // specified is used for all iterations beyond the length of the list.
    WeightThresholdsSequence per_iteration_weight_thresholds = 7;

    // Dynamically change the weight threshold in each iteration.
    DynamicWeightThresholdConfig dynamic_weight_threshold_config = 8;
  }

  // Specifies how edge weights are aggregated when computing a compressed graph
  // for subsequent iterations. Let S = set of edge weights between two
  // clusters, X, Y = total number of nodes in each cluster. With these
  // definitions, we use the following formulas:
  enum EdgeAggregationFunction {
    // sum(S) / (X*Y)
    DEFAULT_AVERAGE = 0;
    // max(S)
    MAX = 1;
    // sum(S)
    SUM = 2;
    // sum(S) / min(X, Y)
    CUT_SPARSITY = 3;
    // Let s_0, ..., s_{|S|-1} be a nondecreasing ordering of S. Then,
    // pick edge weight s_N such that N is percentile_linkage_value * (|S|-1).
    PERCENTILE = 4;
    // sum(S) / count(S)
    EXPLICIT_AVERAGE = 5;
  }
  optional EdgeAggregationFunction edge_aggregation_function = 3;

  // Specifies a set of conditions that qualify cluster as "active".
  // An unset field defines a condition that's always satisfied.
  message ActiveClusterCondition {
    // Minimum density, that is total edge weight divided by number of
    // (unordered) node pairs.
    optional double min_density = 1;
    // Sum of weights of edges leaving the cluster divided by the total weight
    // of edges incident to all nodes in the cluster.
    optional double min_conductance = 2;
  }

  // A possible ways in which a cluster may qualify as "active". A cluster is
  // active if it satisfies at least one of the conditions listed in this field.
  // If the field is empty, every cluster is active.
  repeated ActiveClusterCondition active_cluster_conditions = 4;
}

// Consider a graph with vertex set V, edge set E, non-negative vertex weights
// k_u, edge weights w_uv, and a "resolution" parameter which must be
// non-negative. We define "rescaled" edge weights w'_uv for all u, v, in V as:
//             { 0                                if u == v
//             {  w_uv - edge_weight_offset -     if {u, v} in E,
//   w'_{uv} = {    resolution k_u k_v
//             { -resolution k_u k_v    otherwise
// The correlation clustering objective is to maximize
//   sum_{u, v in the same cluster} w'_uv,
// which is equivalent (up to sign and an additive constant) to the
// "maximizing agreements" and "minimizing disagreements" formulations of
// correlation clustering that are used in the approximation algorithms
// literature.
//
// To optimize this objective we use local search. We start with each vertex in
// its own cluster. We consider moves of the following form: move all vertices
// in a "move set" S of vertices to either one of the existing clusters or to a
// newly created cluster. We currently consider the following options for S:
//  - Each vertex in a singleton move set. This reduces to the classic single
//    vertex moves.
//  - One move set per current cluster with all the vertices currently in it.
//    With these move sets we're effectively considering merging two clusters.
// The local search proceeds with effectively three nested loops. The outermost
// loop is over the num_iterations iterations. The middle loop is over the four
// move types listed above. The inner loop is over move sets of the particular
// type. For each move set considered we move that move set to the cluster that
// improves the objective the most if an improving move exists.
// Next available tag: 10
message CorrelationClustererConfig {
  // Parameters used by both CorrelationClusterer and
  // ParallelCorrelationClusterer
  // The next two fields control how the rescaled edge weights are calculated.
  // See comment above CorrelationClustererConfig.
  optional double resolution = 1;
  optional double edge_weight_offset = 2;

  // Parameters only used by Correlation Clusterer
  optional uint32 random_seed = 3;
  // Number of local improvement iterations. Each iteration has runtime linear
  // in the number of edges.
  // By default, or if non-positive, a reasonable value is used, currently 10.
  optional int64 num_iterations = 4;

  // Parameters if Louvain is chosen for the clustering_moves_method.
  optional LouvainConfig louvain_config = 8;

  // Specifies the algorithm to use for correlation clustering. Note that
  // LOUVAIN is for benchmarking purposes only, and DEFAULT_CLUSTER_MOVES should
  // by default be used.
  enum ClusteringMovesMethod {
    // This method involves alternating between single vertex best moves and
    // entire cluster best moves. An iteration consists of one round of single
    // vertex best moves and one round of entire cluster best moves. The number
    // of iterations is as given in num_iterations.
    DEFAULT_CLUSTER_MOVES = 0;
    // This method performs the classic Louvain algorithm, where after
    // rounds of best moves converge, the algorithm compresses clusters into
    // nodes and then repeats this process on the compressed graph. The
    // parameters using this algorithm are given in louvain_config.
    LOUVAIN = 1;
  }
  optional ClusteringMovesMethod clustering_moves_method = 9
      [default = DEFAULT_CLUSTER_MOVES];
}

// This config is for clustering using the Louvain algorithm, where the
// objective is given by another config.
message LouvainConfig {
  // Max number of rounds (of best moves and compression) to run.
  optional int64 num_iterations = 1;

  // Number of best moves rounds to run. This is primarily for parallel
  // Louvain, which may not terminate otherwise.
  optional int64 num_inner_iterations = 2;
}

// Config for InMemoryClusterer subclasses. When adding a new subclass:
//  a) add a new proto definition to this file,
//  b) add this proto to the config oneof in ClustererConfig.
message ClustererConfig {
  oneof config {
    // Use this to pass parameters to experimental partitioners and
    // partitioners defined in unittests.
    bytes parameter_string = 1;
    AffinityClustererConfig affinity_clusterer_config = 4;
    CorrelationClustererConfig correlation_clusterer_config = 7;
  }
}
